// STL
#include <functional> // std::function<>
#include <vector>     // std::vector<>


//
// DESC: Perform kernel LDA on a vector set LDA_vector
//
// INPUT:
//          nC                : number of classes
//          LDA_vector        : total set of vectors
//          LDA_vector_sep    : set of vectors, separated by classes
//          kernel            : kernel function k(X,X')
//
// OUTPUT:
//          alpha             : values of alpha
//
// NOTES:
//     ! the following algorithm is from the paper:
//          S. Mika et al. ``Fisher discriminant analysis with kernels'' in Neural Networks for Signal Processing IX, 1999. Proceedings of the 1999 IEEE Signal Processing Society Workshop.
//     ! !!! JMM: I think that below is correct, I modified the two class algorithm to be more general (BUT IS NOT COMPLETE YET FOR MULTICLASS PROBLEMS)
//
std::vector<std::vector<double>> kernelLDA(
                                           const int                                                              nC,
                                           // -----
                                           const std::vector<std::vector<double>>                                &LDA_vector,
                                           const std::vector<std::vector<std::vector<double>>>                   &LDA_vector_sep,
                                           // -----
                                           const std::function<double(std::vector<double>, std::vector<double>)> &kernel
                                           )
{
    std::vector<std::vector<double>> alpha;


    // SET THE REGULARIZATION PARAMETER FOR MATRIX N
    // ! Mueller uses MU = 10^-3
    const double MU = 1.0E-3;
    // SET THE TOLERANCE FOR INVERTING MATRIX N (IF SINGULAR)
    const double TOL = 1.0E-3;
    // SET THE TOLERANCE FOR ZERO-ING N MATRIX FOR *VERY* SMALL VALUES
    const double ZEROTOL = 1.0E-6;

    //*********************************************************


    //=========================================================
    // ERROR CHECK(S)
    //=========================================================

    if( nC != 2 ) { std::cout << "Error in kernelLDA(): nC != 2, which is not described in Mueller paper! NOT IMPLEMENTED FULLY!" << std::endl; }

    if( LDA_vector_sep.size() != nC ) { std::cout << "Error in kernelLDA(): LDA_vector_sep.size() != nC!" << std::endl; }


    //=========================================================
    // FORM THE M VECTORS: (M_i)_j = (1/l_i)*sum{k=1,l_i}[k(x_j, x_k^i)] [size: l x 1]
    //=========================================================
    // ! see just after Eq. (6)

    vector<vector<double>> M(nC, vector<double>( LDA_vector.size(), 0.0 ));

    // FOR EACH CLASS i ...
    for(int i = 0; i < nC; ++i)
    {
        // FOR EACH POINT j IN THE TOTAL SET OF LDA_vectors (ROW j IN M_i)...
        for( decltype(LDA_vector.size()) j = 0; j < LDA_vector.size(); ++j)
        {
            // SUM OVER EACH POINT k IN THE CLASS i & CALCULATE k(x_j, x_k^i)
            M[i][j] = 0.0;
            for( decltype(LDA_vector_sep[i].size()) k = 0; k < LDA_vector_sep[i].size(); ++k) { M[i][j] += kernel( LDA_vector[j], LDA_vector_sep[i][k] ); }
            M[i][j] /= static_cast<double>( LDA_vector_sep[i].size() );
        }
    }


    //=========================================================
    // FORM THE M MATRIX: M = (M1 - M2)*(M1 - M2)^T [size: l x l]
    //=========================================================
    // ! see just after Eq. (7)

    vector<vector<double>> Mmat( LDA_vector.size(), vector<double>( LDA_vector.size(), 0.0 ) );

    for( decltype(LDA_vector.size()) i = 0; i < LDA_vector.size(); ++i)
    {
        for( decltype(LDA_vector.size()) j = 0; j < LDA_vector.size(); ++j)
        {
            Mmat[i][j] = (M[0][i] - M[1][i])*(M[0][j] - M[1][j]);
        }
    }


    //=========================================================
    // FORM THE K_j MATRICES: (K_j)nm = k(x_n, x_m^j) [size: l x lj]
    //=========================================================
    // ! see just after Eq. (8)

    vector<vector<vector<double>>> Kmat(nC, vector<vector<double>>(LDA_vector.size()));

    // FOR EACH CLASS i ...
    for(int i = 0; i < nC; ++i)
    {
        // FOR EACH POINT n IN THE TOTAL SET l ...
        for( decltype(LDA_vector.size()) n = 0; n < LDA_vector.size(); ++n)
        {
            // FIRST FINISH ALLOCATING Kmat AS THE SIZE lj ...
            Kmat[i][n].resize( LDA_vector_sep[i].size() );

            // ... NOW, FOR EACH POINT IN SUBSET lj, ASSIGN k(x_n, x_m^j) ...
            for( decltype(LDA_vector_sep[i].size()) m = 0; m < LDA_vector_sep[i].size(); ++m) { Kmat[i][n][m] = kernel( LDA_vector[n], LDA_vector_sep[i][m] ); }
        }
    }


    //=========================================================
    // FORM THE N MATRIX: N = sum{j=1,nC}[K_j*(I - 1_lj)*K_j^T] [size: l x l]
    //=========================================================
    // ! see just after Eq. (8)

    vector<vector<double>> N( LDA_vector.size(), vector<double>( LDA_vector.size(), 0.0 ));

    // FOR EACH CLASS i ...
    for(int i = 0; i < nC; ++i)
    {
        // CALULATE THE RIGHT-SIDE MATRIX I_1lj = (I - 1_lj)*K_j^T [size: lj x l]:
        // ... FIRST CALCULATE (I - 1_lj), where (1_lj = 1/lj[j]) [size: lj x lj] ...
        vector<vector<double>> I_1lj( LDA_vector_sep[i].size(), vector<double>( LDA_vector_sep[i].size(), -1.0/static_cast<double>( LDA_vector_sep[i].size() ) ) );
        for(unsigned int j = 0; j < LDA_vector_sep[i].size(); ++j) { I_1lj[j][j] += 1.0; }
        // ... AND NOW CALCULATE (I - 1_lj)*K_j^T
        vector<vector<double>> KjT( LDA_vector_sep[i].size(), vector<double>(  LDA_vector.size(), 0.0 ) );
        for(decltype(LDA_vector_sep[i].size()) j = 0; j < LDA_vector_sep[i].size(); ++j)
        {
            for( decltype(LDA_vector.size()) k = 0; k < LDA_vector.size(); ++k)
            {
                KjT[j][k] = 0.0;
                for( decltype(LDA_vector_sep[i].size()) l = 0; l < LDA_vector_sep[i].size(); ++l) { KjT[j][k] += ( I_1lj[j][l]*Kmat[i][k][l] ); }
            } // ++j
        } // ++i

        // FINALLY CALCULATE K_j*(I - 1_lj)*K_j^T:
        for( decltype(LDA_vector.size()) j = 0; j < LDA_vector.size(); ++j)
        {
            for( decltype(LDA_vector.size()) k = 0; k < LDA_vector.size(); ++k)
            {
                for( decltype(LDA_vector.size()) l = 0; l < LDA_vector_sep[i].size(); ++l) { N[j][k] += ( Kmat[i][j][l]*KjT[l][k] ); }
            } // ++j
        } // ++i
    }

    // REGULARIZATION: N_mu = N + mu*I
    // ! see Eq. (11)
    // ! since we are estimating l-dimensional covariances structures from l samples, the problem is ill-posed
    for( decltype(LDA_vector.size()) j = 0; j < LDA_vector.size(); ++j) { N[j][j] += MU; }

    // DISCARD VERY LOW VALUES IN N
    // !!! JMM: *I think* this should be OK
    for( decltype(LDA_vector.size()) i = 0; i < LDA_vector.size(); ++i)
    {
        for( decltype(LDA_vector.size()) j = 0; j < LDA_vector.size(); ++j)
        {
            if( fabs(N[i][j]) < ZEROTOL ) { N[i][j] = 0.0; }
        }
    }



    //=========================================================
    // SETUP SVD STUFF
    //=========================================================

    // ! IMPORTANT: the SVD routine does not use C++ 0 offsets
    // A[1..m][1..n]
    // W[1..n], (diagonal matrix of [1..n][1..n])
    // V[1..n][1..n]
    // ! recall that from SVD, U is not stored in A, and V is returned AS V, NOT V^T

    int m = LDA_vector.size();    // num rows of A
    int n = LDA_vector.size();    // num colums of A

    double **A = new double * [m+1];
    for(int i = 0; i <= m; ++i) { A[i] = new double [n+1]; }
    double *w = new double [n+1];
    double **V;
    V   = new double * [n+1];
    for(int i = 0; i <= n; ++i) { V[i]  = new double [n+1]; }


    //=========================================================
    // INVERT N USING SVD
    //=========================================================

    // COPY THE MATRIX A = N
    for(int i = 0; i < m; ++i)
    {
        for(int j = 0; j < n; ++j)
        {
            A[i+1][j+1] = N[i][j];
        }
    }

    // PERFORM SVD
    // ! recall that U is stored in A, and V is returned AS V, NOT V^T
    svdcmp(A, m, n, w, V);

    // CHECK IF N IS SINGULAR
    // ! N is nonsingular iff ALL its eigenvalues are not 0 (*BUT* for numerical issues not less than TOL)
    bool singular = false;
    for(int i = 0; i < n; ++i)
    {
        if( w[i+1] < TOL )
        {
            singular = true;
            break;
        }
    }

    if( singular ) { std::cout << "Warning in kernelLDA(): N is (or is nearly) singular. Only an approximation to N^-1 will be obtained." << std::endl; }

    // CALCULATE W^-1 (THE INVERSE OF SINGULAR VALUES)
    // ! if N *IS* singular, we can still get an APPROXIMATION to N^-1 by considering very small singular values as 0
    // ! *NO* C++ offsets in Dinv
    vector<vector<double>> Winv( n, vector<double>( n, 0.0 ) );
    for(int i = 0; i < n; ++i)
    {
        if( w[i+1] >= TOL ) { Winv[i][i] = 1.0/w[i+1]; }
        else                { Winv[i][i] = 0.0; }
    }

    // CALCULATE W^-1*U^T
    // !!! JMM: can be made faster by not using a matrix for Winv
    vector<vector<double>> WinvUT( LDA_vector.size(), vector<double>( LDA_vector.size(), 0.0 ) );

    for( decltype(LDA_vector.size()) i = 0; i < LDA_vector.size(); ++i)
    {
        for( decltype(LDA_vector.size()) j = 0; j < LDA_vector.size(); ++j)
        {
            for( decltype(LDA_vector.size()) k = 0; k < LDA_vector.size(); ++k) { WinvUT[i][j] += ( Winv[i][k]*A[j+1][k+1] ); }
        }
    }

    // CALCULATE N^-1 = V*W^-1*U^T
    // ! recall that from SVD that N = U*W*V^T
    vector<vector<double>> Ninv( LDA_vector.size(), vector<double>( LDA_vector.size(), 0.0 ) );

    for( decltype(LDA_vector.size()) i = 0; i < LDA_vector.size(); ++i)
    {
        for( decltype(LDA_vector.size()) j = 0; j < LDA_vector.size(); ++j)
        {
            for( decltype(LDA_vector.size()) k = 0; k < LDA_vector.size(); ++k) { Ninv[i][j] += ( V[i+1][k+1]*WinvUT[k][j] ); }
        }
    }

    // N^-1 SHOULD BE SYMMETRIC -- BUT IS PERHAPS NOT, DUE TO ROUNDOFF IN SVD
    for( decltype(LDA_vector.size()) i = 0; i < LDA_vector.size(); ++i)
    {
        for( decltype(LDA_vector.size()) j = 0; j < i; ++j) { Ninv[j][i] = Ninv[i][j]; }
    }

    //=========================================================
    // CLEANUP SVD
    //=========================================================

    for(int i = 0; i <= m; ++i) { delete [] A[i]; }
    delete [] A;
    delete [] w;
    for(int i = 0; i <= n; ++i) { delete [] V[i]; }
    delete [] V;


    //=========================================================
    // CALCULATE A = (N^-1)*M
    //=========================================================

    vector<vector<double>> NinvM( LDA_vector.size(), vector<double>( LDA_vector.size(), 0.0 ) );

    for( decltype(LDA_vector.size()) i = 0; i < LDA_vector.size(); ++i)
    {
        for( decltype(LDA_vector.size()) j = 0; j < LDA_vector.size(); ++j)
        {
            for( decltype(LDA_vector.size()) k = 0; k < LDA_vector.size(); ++k) { NinvM[i][j] += ( Ninv[i][k]*Mmat[k][j] ); }
        }
    }

    /*
    //=========================================================
    // FIND LEADING  EIGENVALUE & EIGENVECTOR VIA POWER METHOD
    //=========================================================
    // ! see algorithm 4.1 (power method) in:
    //     http://www.math.washington.edu/~morrow/498_13/eigenvalues.pdf

    vector<double> x( LDA_vector.size(), 0.0 ), y( LDA_vector.size(), 0.0 );
    double lambda, lambda_prev;

    // RANDOMIZE VECTOR b ...
    for(int i = 0; i < LDA_vector.size(); ++i) { x[i] = rng_uniform_real_dist_MersTwist(0.0, 1.0); }
    // ... AND NORMALIZE BY (x^T)*x
    double xTx = 0.0;
    for(int i = 0; i < LDA_vector.size(); ++i) { xTx += ( x[i]*x[i] ); }
    for(int i = 0; i < LDA_vector.size(); ++i) { x[i] /= xTx; }

    // NOW ITERATE
    lambda = -1.0E36;
    do
    {
        // ASSIGN PREVIOUS LAMBDA
        lambda_prev = lambda;

        // CALCULATE y_{i+1} = A*x_i
        for(int i = 0; i < LDA_vector.size(); ++i)
        {
            y[i] = 0.0;
            for(int k = 0; k < LDA_vector.size(); ++k) { y[i] += ( NinvM[i][k]*x[k] ); }
        }

        // CALCULATE (APPROXIMATE) EIGENVALUE
        // ! IMPORTANT: this gives the eigenvalue at step i *NOT* i+1
        // ! the reason this is done here is because we now have A*x_i (y_{i+1}) and x_i available
        lambda = 0.0;
        for(int i = 0; i < LDA_vector.size(); ++i) { lambda += ( x[i]*y[i] ); }

        // CALCULATE ||y_{i+1}||^2
        double y2 = 0.0;
        for(int i = 0; i < LDA_vector.size(); ++i) { y2 += ( y[i]*y[i] ); }

        // CALCULATE (APPROXIMATE) EIGENVECTOR x_{i+1} = y_{i+1}/||y_{i+1}||^2
        for(int i = 0; i < LDA_vector.size(); ++i) { x[i] = y[i]/y2; }

        // ! APPROXIMATE EIGENVALUE AT {i+1} WILL BE COMPUTED ON NEXT ITERATION
        std::cout << "eigenvalue: " << lambda << std::endl;

    } while( fabs( lambda - lambda_prev ) > EIGENTOL );

    // ASSIGN LEADING EIGENVECTOR TO ALPHA
    alpha = vector<vector<double>>( 1, vector<double>( LDA_vector.size(), 0.0 ) );
    for(int i = 0; i < LDA_vector.size(); ++i) { alpha[0][i] = x[i]; }
    */

    //=========================================================
    // FIND EIGENVALUES & EIGENVECTORS OF NinvM
    //=========================================================

    // SET TO ONLY COMPUTE RIGHT EIGENVECTORS
    char JOBVL = 'N';
    char JOBVR = 'V';
    // SET ORDER OF MATRIX & LEADING DIMENSION (SAME -- WORK WITH WHOLE MATRIX), AND STORE IN *COLUMN-MAJOR* FORMAT
    int order = LDA_vector.size();
    int LDA   = LDA_vector.size();
    vector<double> Amat;
    for( decltype(LDA_vector.size()) j = 0; j < LDA_vector.size(); ++j) { for( decltype(LDA_vector.size()) i = 0; i < LDA_vector.size(); ++i) { Amat.push_back(NinvM[i][j]); } }
    // SETUP ARRAYS FOR REAL & IMAGINARY PARTS OF EIGENVALUES
    vector<double> WR( LDA_vector.size() ), WI( LDA_vector.size() );
    // SET LEADING DIMENSIONS FOR EIGENVECTORS (LEFT & RIGHT) & SETUP ARRAYS
    int LDVL = LDA_vector.size();
    int LDVR = LDA_vector.size();
    vector<double> VL( LDVL*order );
    vector<double> VR( LDVL*order );
    // SET DIMENSION OF WORK ARRAY & SETUP WORK ARRAY
    int LWORK = 4*order;
    vector<double> work( LWORK );
    // SETUP AN INFO FLAG
    int INFO;

    // CALL DGEEV
    // ! IMPORTANT: all values must be passed by address, *NOT* value
    // ! IMPORTANT: matrices are stored by column-major, *NOT* row-major
    dgeev_( &JOBVL, &JOBVR, &order, &*Amat.begin(), &LDA, &*WR.begin(), &*WI.begin(), &*VL.begin(), &LDVL, &*VR.begin(), &LDVR, &*work.begin(), &LWORK, &INFO );

    // CHECK THAT RETURN WAS NORMAL
    if( INFO != 0 ) { std::cout << "Error in kernelLDA(): INFO did not return as 0!" << endl; }

    //---------------------------------------------------------
    // ASSIGN EIGENVALUES & EIGENVECTORS INTO EASY TO UNDERSTAND FORMAT
    //---------------------------------------------------------

    vector<complex<double>> eigenvalue( LDA_vector.size() );
    vector<double>          eigenvalue2( LDA_vector.size() );
    vector<vector<double>> eigenvector( LDA_vector.size(), vector<double>( LDA_vector.size(), 0.0 ) );

    // ASSIGN EIGENVALUES & FIND LEADING ONE
    double maxeigen = -1.0E36;
    int imaxeigen   = -1;
    for( decltype(LDA_vector.size()) i = 0; i < LDA_vector.size(); ++i)
    {
        eigenvalue[i]  = complex<double>(WR[i], WI[i]);
        eigenvalue2[i] = ( WR[i]*WR[i] + WI[i]*WI[i] );

        if( eigenvalue2[i] > maxeigen )
        {
            maxeigen  = eigenvalue2[i];
            imaxeigen = i;
        }
    }

    // ENSURE WE FOUND A MAX. EIGENVECTOR
    if( imaxeigen == -1 ) { std::cout << "Error in kernelLDA(): Did not find a leading eigenvalue!" << endl; }

    // ASSIGN EIGENVECTORS & LEADING ONE TO ALPHA
    alpha = vector<vector<double>>( 1, vector<double>( LDA_vector.size(), 0.0 ) );
    for( decltype(LDA_vector.size()) j = 0; j < LDA_vector.size(); ++j)
    {
        for( decltype(LDA_vector.size()) i = 0; i < LDA_vector.size(); ++i)
        {
            eigenvector[i][j] = VR[j*LDA_vector.size() + i];

            // IF THIS IS THE LEADING EIGENVECTOR, ASSIGN TO ALPHA ...
            if( j == imaxeigen ) { alpha[0][i] = VR[j*LDA_vector.size() + i]; }
        }
    }

    return alpha;
}
